1 Introduction

The purpose of this notebook is to ensure that the Richter-like cells predicted at the initial time points of the disease do not correspond to doublets. To that end, we will merge the RT-CLL predicted doublets detected by cell hashing with the filtered Seurat objects. This ground-truth doublets should cluster at the interface between CLL and RT clusters, while the RT-like cells should be embedded in the clusters.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(ggpubr)
library(ggridges)
library(tidyverse)

2.2 Define parameters

path_to_obj <- here::here("results/R_objects/patient_63/7.seurat_list_annotated_reprocessed.rds")
path_to_demultiplex <- here::here("results/R_objects/demultiplexed")
path_to_libraries_metadata <- here::here("1-cellranger_mapping/data/richter_metadata.csv")
path_to_sample_metdata <- here::here("data/sample_metadata_FN.csv")
path_to_richter_signature <- here::here("results/R_objects/richter_signatures_list.rds")
path_to_cll_signature <- here::here("results/R_objects/cll_signatures_list.rds")


# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
                   "blue", "mediumorchid2", "coral2", "blueviolet",
                   "indianred4", "deepskyblue1", "dimgray", "deeppink1",
                   "green", "lightgray", "hotpink1")


# Source functions
source(here::here("bin/utils.R"))

2.3 Load data

seurat_list <- readRDS(path_to_obj)
seurat_list$`12`$annotation_final <- Idents(seurat_list$`12`)
seurat_list$`19`$annotation_final <- Idents(seurat_list$`19`)
seurat_list$`3299`$annotation_final <- Idents(seurat_list$`3299`)
seurat_list$`3299`$is_richter <- ifelse(
  seurat_list$`3299`$time_point %in% c("T2", "T3"),
  TRUE,
  FALSE
)
seurat_list
## $`12`
## An object of class Seurat 
## 23403 features across 5913 samples within 1 assay 
## Active assay: RNA (23403 features, 2000 variable features)
##  4 dimensional reductions calculated: pca, harmony, umap, umap_3D
## 
## $`19`
## An object of class Seurat 
## 23403 features across 7742 samples within 1 assay 
## Active assay: RNA (23403 features, 2000 variable features)
##  4 dimensional reductions calculated: pca, harmony, umap, umap_3D
## 
## $`3299`
## An object of class Seurat 
## 23403 features across 6037 samples within 1 assay 
## Active assay: RNA (23403 features, 2000 variable features)
##  4 dimensional reductions calculated: pca, harmony, umap, umap_3D
umaps_annotation <- purrr::map(
  seurat_list,
  DimPlot,
  cols = color_palette,
  pt.size = 0.5,
  group.by = "annotation_final"
)
umaps_annotation
## $`12`

## 
## $`19`

## 
## $`3299`

# Load demultiplexed files (with doublets)
files_to_load <- list.files(path_to_demultiplex, full.names = TRUE)
gem_ids <- list.files(path_to_demultiplex, full.names = FALSE) %>%
  str_remove("^seurat_") %>%
  str_remove("_demultiplexed.rds$")
names(files_to_load) <- gem_ids
seurat_demultiplexed <- purrr::map(files_to_load, readRDS)


# Load metadata files
libraries_metadata <- read_csv(path_to_libraries_metadata)
samples_metadata <- read_csv(path_to_sample_metdata)
DT::datatable(libraries_metadata)
DT::datatable(samples_metadata)
# Load signatures
richter_signatures <- readRDS(path_to_richter_signature)
cll_signatures <- readRDS(path_to_cll_signature)

3 Keep RT-CLL doublets and subset

seurat_demultiplexed_12 <- merge(
  x = seurat_demultiplexed$r65huxjg_g0itbe2k,
  y = seurat_demultiplexed$bco1we6n_qaz6s7wd
)
seurat_demultiplexed_19 <- merge(
  x = seurat_demultiplexed$te04uhdq_2uatjn5y,
  y = seurat_demultiplexed$j7z61du3_vyxon6q4
)
seurat_demultiplexed_3299 <- merge(
  x = seurat_demultiplexed$p092mcjf_6w9km8re,
  y = seurat_demultiplexed$z97dxyfq_xi38kvhc
)
seurat_demultiplexed <- list(
  "12" = seurat_demultiplexed_12,
  "19" = seurat_demultiplexed_19,
  "3299" = seurat_demultiplexed_3299
)
rm(seurat_demultiplexed_12)
rm(seurat_demultiplexed_19)
rm(seurat_demultiplexed_3299)


richter_samples <- c(
  "ICGC-365-15-194",
  "ICGC-01918-3879A",
  "ICGC-012-19-4600B",
  "ICGC-3299-17-251",
  "ICGC-3299-18-218A",
  "ICGC-3299-18-5761A"
)
richter_annotations <- c(
  "CCND2hi Richter-like quiescent",
  "CCND2lo Richter-like quiescent",
  "Richter-like proliferative",
  "TP53INP1hi Richter-like quiescent",
  "MIR155HGhi CD20hi Richter-like quiescent",
  "Richter-like proliferative",
  "Richter-like"
)
seurat_list <- purrr::map(seurat_list, function(seurat_obj) {
  seurat_obj$annotation_final <- as.character(seurat_obj$annotation_final)
  seurat_obj
})
seurat_demultiplexed2 <- purrr::map(names(seurat_list), function(x) {
  rt_cll_doublets <- 
    (seurat_demultiplexed[[x]]$HTO_maxID %in% richter_samples |
       seurat_demultiplexed[[x]]$HTO_secondID %in% richter_samples) &
    seurat_demultiplexed[[x]]$HTO_classification.global == "Doublet"
  rt_cll_doublets <- colnames(seurat_demultiplexed[[x]])[rt_cll_doublets]
  rt_seed_cells <- 
    seurat_list[[x]]$is_richter == FALSE &
    seurat_list[[x]]$annotation_final %in% richter_annotations
  rt_seed_cells <- colnames(seurat_list[[x]])[rt_seed_cells]
  background_cells <- !(colnames(seurat_list[[x]]) %in% rt_seed_cells)
  background_cells <- colnames(seurat_list[[x]])[background_cells]
  selected_cells <- Reduce(
    union,
    list(rt_cll_doublets, rt_seed_cells, background_cells)
  )
  seurat_obj <- subset(seurat_demultiplexed[[x]], cells = selected_cells)  
  seurat_obj$gem_id <- str_sub(colnames(seurat_obj), start = 1, end = 17)
  seurat_obj$classification <- case_when(
    colnames(seurat_obj) %in% rt_cll_doublets ~ "RT-CLL doublets",
    colnames(seurat_obj) %in% rt_seed_cells ~ "RT seeds",
    colnames(seurat_obj) %in% background_cells ~ "Background",
  )
  seurat_obj$annotation_final <- "unannotated"
  seurat_obj$annotation_final[background_cells] <- seurat_list[[x]]$annotation_final[background_cells]
  seurat_obj$annotation_final[rt_seed_cells] <- seurat_list[[x]]$annotation_final[rt_seed_cells]
  seurat_obj$annotation_final[rt_cll_doublets] <- "RT-CLL doublets"
  seurat_obj$donor_id <- x
  Idents(seurat_obj) <- "annotation_final"
  seurat_obj
})
names(seurat_demultiplexed2) <- names(seurat_list)
rm(seurat_demultiplexed)

4 Reprocess merged objects

seurat_demultiplexed2 <- purrr::map(seurat_demultiplexed2, NormalizeData)
seurat_demultiplexed2 <- purrr::map(
  seurat_demultiplexed2,
  process_seurat,
  dims = 1:20
)
purrr::map(seurat_demultiplexed2, DimPlot, pt.size = 0.5, cols = color_palette)
## $`12`

## 
## $`19`

## 
## $`3299`

5 Plot

First, we will assess whether Richter transformation (RT) “seed” cells colocalize with doublets:

# Split UMAP by background, "seed cells" and RT-CLL doublets
umap_doublets <- purrr::map(
  seurat_demultiplexed2,
  plot_split_umap,
  var = "classification"
)
umap_doublets
## $`12`

## 
## $`19`

## 
## $`3299`

6 Hashtag Oligonucleotide distribution

Next, we seek to elucidate the HTO of the RT samples in seed cells. RT seed cells which, by definition, are classified as singlets of a non-RT time-point. Thus, if they are not doublets they should not have a significantly higher expression of the RT HTO:

# Patients 12 and 19
# Violin plots
violin_plots_12_19 <- purrr::map(
  seurat_demultiplexed2[c("12", "19")],
  function(seurat_obj) {
    DefaultAssay(seurat_obj) <- "HTO"
    selected_features <- rownames(seurat_obj)[rownames(seurat_obj) %in% richter_samples]
    seurat_obj <- subset(seurat_obj,  features = selected_features)
    seurat_obj$richter_hto <- seurat_obj[["HTO"]]@data[1, ]
    p <- seurat_obj@meta.data %>%
      ggplot(aes(classification, richter_hto, fill = classification)) +
        geom_violin() +
        facet_wrap(~gem_id) +
        labs(x = "", y = "RT HTO normalized expression", fill = "") +
        theme_bw() +
        theme(legend.position = "none")
    p
  })
violin_plots_12_19
## $`12`

## 
## $`19`

# Ridge plots
density_plots_12_19 <- purrr::map(
  seurat_demultiplexed2[c("12", "19")],
  function(seurat_obj) {
  DefaultAssay(seurat_obj) <- "HTO"
  selected_features <- rownames(seurat_obj)[rownames(seurat_obj) %in% richter_samples]
  seurat_obj <- subset(seurat_obj,  features = selected_features)
  seurat_obj$richter_hto <- seurat_obj[["HTO"]]@data[1, ]
  p <- seurat_obj@meta.data %>%
    ggplot(aes(x = richter_hto, y = classification, fill = classification)) +
      geom_density_ridges() +
      facet_wrap(~gem_id) +
      labs(x = "RT HTO normalized expression", y = "", fill = "") +
      theme_bw() +
      theme(legend.position = "none")
  p
})
density_plots_12_19
## $`12`

## 
## $`19`

7 RT and CLL signatures

Finally, we will score each cell with a RT or CLL-specific signature, which we calculated in another notebook. If the RT seed cells are truly Richter-like, they should have a high RT signature and low CLL signature. On the other hand, if they are RT-CLL doublets, they will have an intermediate signature in both:

# Calculate signatures
patients <- names(seurat_demultiplexed2)
seurat_demultiplexed2 <- purrr::map(patients, function(x) {
  seurat_obj <- seurat_demultiplexed2[[x]]
  DefaultAssay(seurat_obj) <- "RNA"
  seurat_obj <- AddModuleScore(
    seurat_obj,
    features = list(
      cll_signature = cll_signatures[[x]],
      richter_signature = richter_signatures[[x]]
    ),
    name = c("cll_signature", "richter_signature")
  )
  seurat_obj
})
names(seurat_demultiplexed2) <- patients


# Plot
violin_plots_signatures <- purrr::map(
  seurat_demultiplexed2,
  function(seurat_obj) {
    cll_p <- seurat_obj@meta.data %>%
      ggplot(aes(classification, cll_signature1, fill = classification)) +
        geom_violin() +
        labs(x = "", y = "CLL signature score") +
        theme_bw() +
        theme(legend.position = "none")
    richter_p <- seurat_obj@meta.data %>%
      ggplot(aes(classification, richter_signature2, fill = classification)) +
        geom_violin() +
        labs(x = "", y = "Richter signature score") +
        theme_bw() +
        theme(legend.position = "none")
    output <- ggarrange(plotlist = list(cll_p, richter_p), ncol = 2)
    output
})
violin_plots_signatures
## $`12`

## 
## $`19`

## 
## $`3299`

8 Session Info

sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1      stringr_1.4.0      dplyr_1.0.6        purrr_0.3.4        readr_1.4.0        tidyr_1.1.3        tibble_3.1.2       tidyverse_1.3.0    ggridges_0.5.3     ggpubr_0.4.0       ggplot2_3.3.3      SeuratObject_4.0.1 Seurat_4.0.1       BiocStyle_2.18.1  
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1          backports_1.2.1       plyr_1.8.6            igraph_1.2.6          lazyeval_0.2.2        splines_4.0.4         crosstalk_1.1.1       listenv_0.8.0         scattermore_0.7       digest_0.6.27         htmltools_0.5.1.1     fansi_0.4.2           magrittr_2.0.1        tensor_1.5            cluster_2.1.1         ROCR_1.0-11           openxlsx_4.2.3        globals_0.14.0        modelr_0.1.8          matrixStats_0.58.0    spatstat.sparse_2.0-0 colorspace_2.0-1      rvest_1.0.0           ggrepel_0.9.1         haven_2.3.1           xfun_0.22             crayon_1.4.1          jsonlite_1.7.2        spatstat.data_2.1-0   survival_3.2-10       zoo_1.8-9             glue_1.4.2            polyclip_1.10-0       gtable_0.3.0          leiden_0.3.7          car_3.0-10            future.apply_1.7.0    abind_1.4-5           scales_1.1.1          DBI_1.1.1             rstatix_0.7.0         miniUI_0.1.1.1        Rcpp_1.0.6            viridisLite_0.4.0     xtable_1.8-4          reticulate_1.20       spatstat.core_2.1-2   foreign_0.8-81        DT_0.17               htmlwidgets_1.5.3     httr_1.4.2            RColorBrewer_1.1-2    ellipsis_0.3.2        ica_1.0-2            
##  [55] farver_2.1.0          pkgconfig_2.0.3       sass_0.4.0            uwot_0.1.10           dbplyr_2.1.0          deldir_0.2-10         here_1.0.1            utf8_1.2.1            labeling_0.4.2        tidyselect_1.1.1      rlang_0.4.11          reshape2_1.4.4        later_1.2.0           munsell_0.5.0         cellranger_1.1.0      tools_4.0.4           cli_2.5.0             generics_0.1.0        broom_0.7.5           evaluate_0.14         fastmap_1.1.0         yaml_2.2.1            goftest_1.2-2         fs_1.5.0              knitr_1.31            fitdistrplus_1.1-3    zip_2.1.1             RANN_2.6.1            pbapply_1.4-3         future_1.21.0         nlme_3.1-152          mime_0.10             xml2_1.3.2            rstudioapi_0.13       compiler_4.0.4        plotly_4.9.3          curl_4.3.1            png_0.1-7             ggsignif_0.6.1        spatstat.utils_2.1-0  reprex_1.0.0          bslib_0.2.5           stringi_1.6.2         highr_0.8             RSpectra_0.16-0       lattice_0.20-41       Matrix_1.3-3          vctrs_0.3.8           pillar_1.6.1          lifecycle_1.0.0       BiocManager_1.30.10   spatstat.geom_2.1-0   lmtest_0.9-38         jquerylib_0.1.4      
## [109] RcppAnnoy_0.0.18      data.table_1.14.0     cowplot_1.1.1         irlba_2.3.3           httpuv_1.6.1          patchwork_1.1.1       R6_2.5.0              bookdown_0.21         promises_1.2.0.1      KernSmooth_2.23-18    gridExtra_2.3         rio_0.5.26            parallelly_1.25.0     codetools_0.2-18      MASS_7.3-53.1         assertthat_0.2.1      rprojroot_2.0.2       withr_2.4.2           sctransform_0.3.2     mgcv_1.8-34           parallel_4.0.4        hms_1.0.0             grid_4.0.4            rpart_4.1-15          rmarkdown_2.7         carData_3.0-4         Rtsne_0.15            shiny_1.6.0           lubridate_1.7.10